{
 "metadata": {
  "name": "",
  "signature": "sha256:3b7c0da066835df2d372d5f622fbdba586eba2c48933fc5c4abb3750e5aaa882"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data = \"\"\"type,price,quantity\n",
      "Apples\n",
      "Cortland,0.30,24\n",
      "Red Delicious,0.40,24\n",
      "Oranges\n",
      "Navel,0.50,12\n",
      "\"\"\""
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import petl.interactive as etl\n",
      "from petl.io import StringSource"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tbl1 = (etl\n",
      "    .fromcsv(StringSource(data))\n",
      ")\n",
      "tbl1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>type</th>\r\n",
        "<th>price</th>\r\n",
        "<th>quantity</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Cortland</td>\r\n",
        "<td>0.30</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Red Delicious</td>\r\n",
        "<td>0.40</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Navel</td>\r\n",
        "<td>0.50</td>\r\n",
        "<td>12</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "+-----------------+---------+------------+\n",
        "| 'type'          | 'price' | 'quantity' |\n",
        "+=================+=========+============+\n",
        "| 'Apples'        |         |            |\n",
        "+-----------------+---------+------------+\n",
        "| 'Cortland'      | '0.30'  | '24'       |\n",
        "+-----------------+---------+------------+\n",
        "| 'Red Delicious' | '0.40'  | '24'       |\n",
        "+-----------------+---------+------------+\n",
        "| 'Oranges'       |         |            |\n",
        "+-----------------+---------+------------+\n",
        "| 'Navel'         | '0.50'  | '12'       |\n",
        "+-----------------+---------+------------+"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Option 1 - using existing petl functions"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def make_room_for_category(row):\n",
      "    if len(row) == 1:\n",
      "        return (row[0], 'X', 'X', 'X')\n",
      "    else:\n",
      "        return (None,) + tuple(row)\n",
      "\n",
      "tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])\n",
      "tbl2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>category</th>\r\n",
        "<th>type</th>\r\n",
        "<th>price</th>\r\n",
        "<th>quantity</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>None</td>\r\n",
        "<td>Cortland</td>\r\n",
        "<td>0.30</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>None</td>\r\n",
        "<td>Red Delicious</td>\r\n",
        "<td>0.40</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>None</td>\r\n",
        "<td>Navel</td>\r\n",
        "<td>0.50</td>\r\n",
        "<td>12</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "+------------+-----------------+---------+------------+\n",
        "| 'category' | 'type'          | 'price' | 'quantity' |\n",
        "+============+=================+=========+============+\n",
        "| 'Apples'   | 'X'             | 'X'     | 'X'        |\n",
        "+------------+-----------------+---------+------------+\n",
        "| None       | 'Cortland'      | '0.30'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| None       | 'Red Delicious' | '0.40'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Oranges'  | 'X'             | 'X'     | 'X'        |\n",
        "+------------+-----------------+---------+------------+\n",
        "| None       | 'Navel'         | '0.50'  | '12'       |\n",
        "+------------+-----------------+---------+------------+"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tbl3 = tbl2.filldown()\n",
      "tbl3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>category</th>\r\n",
        "<th>type</th>\r\n",
        "<th>price</th>\r\n",
        "<th>quantity</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Cortland</td>\r\n",
        "<td>0.30</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Red Delicious</td>\r\n",
        "<td>0.40</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "<td>X</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "<td>Navel</td>\r\n",
        "<td>0.50</td>\r\n",
        "<td>12</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "+------------+-----------------+---------+------------+\n",
        "| 'category' | 'type'          | 'price' | 'quantity' |\n",
        "+============+=================+=========+============+\n",
        "| 'Apples'   | 'X'             | 'X'     | 'X'        |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Apples'   | 'Cortland'      | '0.30'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Apples'   | 'Red Delicious' | '0.40'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Oranges'  | 'X'             | 'X'     | 'X'        |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Oranges'  | 'Navel'         | '0.50'  | '12'       |\n",
        "+------------+-----------------+---------+------------+"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tbl4 = tbl3.ne('type', 'X')\n",
      "tbl4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>category</th>\r\n",
        "<th>type</th>\r\n",
        "<th>price</th>\r\n",
        "<th>quantity</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Cortland</td>\r\n",
        "<td>0.30</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Red Delicious</td>\r\n",
        "<td>0.40</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "<td>Navel</td>\r\n",
        "<td>0.50</td>\r\n",
        "<td>12</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "+------------+-----------------+---------+------------+\n",
        "| 'category' | 'type'          | 'price' | 'quantity' |\n",
        "+============+=================+=========+============+\n",
        "| 'Apples'   | 'Cortland'      | '0.30'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Apples'   | 'Red Delicious' | '0.40'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Oranges'  | 'Navel'         | '0.50'  | '12'       |\n",
        "+------------+-----------------+---------+------------+"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Option 2 - custom transformer"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "class CustomTransformer(object):\n",
      "    \n",
      "    def __init__(self, source):\n",
      "        self.source = source\n",
      "        \n",
      "    def __iter__(self):\n",
      "        it = iter(self.source)\n",
      "        \n",
      "        # construct new header\n",
      "        source_fields = it.next()\n",
      "        out_fields = ('category',) + tuple(source_fields)\n",
      "        yield out_fields\n",
      "        \n",
      "        # transform data\n",
      "        current_category = None\n",
      "        for row in it:\n",
      "            if len(row) == 1:\n",
      "                current_category = row[0]\n",
      "            else:\n",
      "                yield (current_category,) + tuple(row)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "tbl5 = CustomTransformer(tbl1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# just so it formats nicely as HTML in the notebook...\n",
      "etl.wrap(tbl5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<table>\r\n",
        "<thead>\r\n",
        "<tr>\r\n",
        "<th>category</th>\r\n",
        "<th>type</th>\r\n",
        "<th>price</th>\r\n",
        "<th>quantity</th>\r\n",
        "</tr>\r\n",
        "</thead>\r\n",
        "<tbody>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Cortland</td>\r\n",
        "<td>0.30</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Apples</td>\r\n",
        "<td>Red Delicious</td>\r\n",
        "<td>0.40</td>\r\n",
        "<td>24</td>\r\n",
        "</tr>\r\n",
        "<tr>\r\n",
        "<td>Oranges</td>\r\n",
        "<td>Navel</td>\r\n",
        "<td>0.50</td>\r\n",
        "<td>12</td>\r\n",
        "</tr>\r\n",
        "</tbody>\r\n",
        "</table>\r\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 9,
       "text": [
        "+------------+-----------------+---------+------------+\n",
        "| 'category' | 'type'          | 'price' | 'quantity' |\n",
        "+============+=================+=========+============+\n",
        "| 'Apples'   | 'Cortland'      | '0.30'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Apples'   | 'Red Delicious' | '0.40'  | '24'       |\n",
        "+------------+-----------------+---------+------------+\n",
        "| 'Oranges'  | 'Navel'         | '0.50'  | '12'       |\n",
        "+------------+-----------------+---------+------------+"
       ]
      }
     ],
     "prompt_number": 9
    }
   ],
   "metadata": {}
  }
 ]
}